1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
   |  import torch model = torch.load('t-sne模型文件/AliExpress_US_sharedbottom.pt') model.eval()
  from datasets.aliexpress import AliExpressDataset train_dataset = AliExpressDataset('tsne.csv')
  from torch.utils.data import DataLoader import random  import numpy as np seed = 2022 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) train_data_loader = DataLoader(train_dataset, batch_size=1, num_workers=4, shuffle=False)
 
  """t-SNE对手写数字进行可视化""" from time import time import numpy as np import matplotlib.pyplot as plt import tqdm device = 'cuda:0' from sklearn import datasets from sklearn.manifold import TSNE from sklearn.decomposition import PCA
 
  def plot_embedding(data, label, title):     x_min, x_max = np.min(data, 0), np.max(data, 0)     data = (data - x_min) / (x_max - x_min)
      fig = plt.figure()     ax = plt.subplot(111)     colors = ['r', 'b']     for i in range(data.shape[0]):         plt.scatter(data[i, 0], data[i, 1], c=colors[label[i]], alpha=0.5)     plt.xticks([])     plt.yticks([])     plt.title(title)     return fig
 
  def main():     with torch.no_grad():         data1 = []         data2 = []         for categorical_fields, numerical_fields, labels in tqdm.tqdm(train_data_loader, smoothing=0, mininterval=1.0):             categorical_fields, numerical_fields, labels = categorical_fields.to(device), numerical_fields.to(device), labels.to(device)             y, _ ,cross= model(categorical_fields, numerical_fields)             data1 += cross[0].cpu().numpy().tolist()             data2 += cross[1].cpu().numpy().tolist()                  data = np.concatenate((data1, data2), axis=0)         label = len(data1)*[0] + len(data2)*[1]                  n_samples, n_features = data.shape         print(data.shape)     print('data.shape',data.shape)           print('label',label)     print('label中数字有',len(set(label)),'个不同的数字')     print('data有',n_samples,'个样本')     print('每个样本',n_features,'维数据')     print('Computing t-SNE embedding')     tsne = TSNE(n_components=2, init='pca', random_state=0)          t0 = time()          result = tsne.fit_transform(data)     print('result.shape',result.shape)     fig = plot_embedding(result, label,                          't-SNE embedding of the digits (time %.2fs)'                          % (time() - t0))     plt.show(fig)
 
  if __name__ == '__main__':     main()
 
 
  |